# -*- coding: utf-8 -*-
"""
Code to replicate the simulations in The Consequences of Using a Top 5/RCV System in Nevada by Gelman, Pritsos and Reilly
Created: March 5, 2024
Most recent update: September 10, 2024

"""

import pyrankvote
from pyrankvote import Candidate, Ballot
import os
import pandas as pd
import numpy as np
import random
import time

dy_text=pd.read_csv('/replication/dynata_text.csv') #Imports Dynata sample with text answers
cv_text=pd.read_csv('/replication/convenience_text.csv') #Imports convenience sample with text answers


frames = [dy_text, cv_text] 
data_text = pd.concat(frames) #These lines create a single dataframe
data_text = data_text[data_text['omit'] != 1] #Gets rid of the 8 observations that are being omitted
data_text = data_text.reset_index()
data_text=data_text.drop(columns=['index'])

data_text.gov22_primary = data_text.gov22_primary.str.split('>').str[-1] #Extract name from 

#Change first line
data_text=data_text.rename(columns={'Q50_1':"davis", 'Q50_2':'gilbert', 'Q50_3':'heller', 'Q50_4':'lombardo', 'Q50_5':'sisolak'})
data_text['davis'] = data_text['davis'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['gilbert'] = data_text['gilbert'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['heller'] = data_text['heller'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['lombardo'] = data_text['lombardo'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['sisolak'] = data_text['sisolak'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})

#Create partisanship variable - this preserves independents as non-partisans. 
data_text['democrat'] = np.where((data_text['Q1']=='Democrat'), 1, 0) 
data_text['republican'] = np.where((data_text['Q1']=='Republican'), 1, 0) 
data_text['independent'] = np.where((data_text['Q1']=='Independent'), 1, 0) 
data_text['third_party'] = np.where((data_text['Q1']=='Something else'), 1, 0) 
data_text['democrat'].sum()
data_text['republican'].sum()
data_text['independent'].sum()
data_text['third_party'].sum()

#Single category for party ID
data_text.loc[data_text['democrat']==1, 'party_id']=0
data_text.loc[data_text['republican']==1, 'party_id']=1
data_text.loc[data_text['independent']==1, 'party_id']=2
data_text.loc[data_text['third_party']==1, 'party_id']=3

#Create the number of hypothetical partisan splits
all_splits=[]
split = []
party_split = ['dem', 'rep', 'ind']
case=0
while(case<10000): #How many partisan splits you want
    choice=1
    while(choice<95): #95% voters assumed in splits (5% as third party)
        split.append(random.choice(party_split))
        choice=choice+1
        if choice==95: #Once 95 iterations are hit, how to store the data or skip
            split.append(random.choice(party_split))    
            dem = split.count('dem')
            rep = split.count('rep')
            ind = split.count('ind')
            new_split = [dem, rep, ind]
            if dem>45 or dem<25: 
                split=[] 
                pass
            elif rep>45 or rep<25: 
                split=[] 
                pass
            elif ind>30 or ind<15:     
                split=[] 
                pass
            elif new_split==all_splits:
                split=[]
                pass
            else: 
                all_splits.append(new_split)
                split=[]
                case=case+1
                print("case", case)

        
#Primary winner
primary_results = pd.DataFrame()
results_hold = pd.DataFrame()
count=0
third=5
sim=1
splits=0
for i in all_splits:
    start=time.time()
    splits=splits+1
    count=1
    dem=i[0]
    rep=i[1]
    ind=i[2]
    print(dem, rep, ind, splits)
    while (count<=500):      
        if count<500:
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])
            gov22_primary_counts = data2.groupby(['gov22_primary']).size().reset_index(name='counts')
            gov22_primary_counts = gov22_primary_counts
            outcome = pd.DataFrame()
            outcome = gov22_primary_counts.nlargest(5, ['counts']) 
            outcome = outcome.sort_values(by=['counts'], ascending=False)
            outcome.index = np.arange(1, len(outcome)+1)
            outcome = outcome.reset_index()
            outcome['sim']=sim
            outcome['dem']=dem
            outcome['rep']=rep
            outcome['ind']=rep

            results_hold = pd.concat([results_hold, outcome])
            outcome = pd.DataFrame()
            count=count+1

        else: 
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])

            gov22_primary_counts = data2.groupby(['gov22_primary']).size().reset_index(name='counts')
            gov22_primary_counts = gov22_primary_counts
            outcome = pd.DataFrame()
            outcome = gov22_primary_counts.nlargest(5, ['counts']) 
            outcome = outcome.sort_values(by=['counts'], ascending=False)
            outcome.index = np.arange(1, len(outcome)+1)
            outcome = outcome.reset_index()
            outcome['sim']=sim
            outcome['dem']=dem
            outcome['rep']=rep
            outcome['ind']=rep
                        
            results_hold = pd.concat([results_hold, outcome])
            outcome = pd.DataFrame()
            count=count+1

    primary_results = pd.concat([primary_results, results_hold])
    results_hold = pd.DataFrame()
    sim = sim+1
    end = time.time()
    print(end-start)

advance2 = primary_results.groupby(['dem'])['sim'].count()
advance2 = advance2.reset_index()
advance2=advance2.rename(columns={'sim':"total",})
advance2['total']= advance2['total']/5                                  

advance3 = primary_results.groupby(['gov22_primary', 'dem'])[('index')].count()
advance3 = advance3.reset_index()
primary_winners = pd.merge(advance3, advance2, how='inner', on='dem')
primary_winners['win_perc'] = primary_winners['index']/primary_winners['total']


os.chdir('/replication/')
primary_winners.to_csv('gov22_primary.csv', index=False)

#This is total counts of winning
advance0 = primary_results.groupby(['gov22_primary']).count()
advance0 = advance0.reset_index()
advance0=advance0.rename(columns={'sim':"total",})
advance0['total']= advance0['total']/5000000
advance0.to_csv('gov22_primary_agg.csv', index=False)

#Run the ranked choice outcomes
champs=[]
results = pd.DataFrame()
count=0
third=5
sim=1
splits=0
for i in all_splits:
    count=1
    dem=i[0]
    rep=i[1]
    ind=i[2]
    splits=splits+1
    print(dem, rep, ind, splits)
    while (count<=500):      
        if count==500:
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])
            gov22 = data2[['davis', 'gilbert', 'heller', 'lombardo', 'sisolak']]
            gov22 = gov22.reset_index()
            gov22_rcv = pd.melt(gov22, id_vars='index', value_vars=['davis', 'gilbert', 'heller', 'lombardo', 'sisolak'])
            gov22_rcv= gov22_rcv.sort_values(by=['index', 'value'])
            gov22_rcv = gov22_rcv.dropna(subset = ['value'])
            gov22_rcv['index']=gov22_rcv['index']+10
            gov22_rcv['dups'] = gov22_rcv.duplicated(subset=['index', 'value'], keep=False)
            gov22_rcv['value']=gov22_rcv['value'].astype(int)
            gov22_rcv.loc[gov22_rcv['dups']==True, 'value']='$OVERVOTE'
            gov22_rcv['value']=gov22_rcv['value'].fillna('$UNDERVOTE')
            gov22_rcv['value']=gov22_rcv['value'].astype(str)
            gov22_rcv['index']=gov22_rcv['index'].astype(str)
            gov22_rcv = gov22_rcv.iloc[:,[0,2,1]]
            gov22_rcv=gov22_rcv.rename(columns={'index':"ballot_id", 'value':'rank', 'variable':'candidate_name'})
            ranks2 = gov22_rcv.values.tolist()
            tups = [tuple(l) for l in ranks2]
        
            parsed_csv_file = [(ballot_id, rank, candidate_name) for ballot_id, rank, candidate_name in tups]      
            sorted_csv_file = parsed_csv_file
    
            candidates = {}
            ballots = []
            last_ballot_id = 0
            ranked_candidates = []

            for ballot_id, rank, candidate_name in sorted_csv_file:
                if ballot_id != last_ballot_id and last_ballot_id != 0:
                    ballot = Ballot(ranked_candidates)
                    ballots.append(ballot)
                    ranked_candidates = []

                last_ballot_id = ballot_id
                if candidate_name == "$UNDERVOTE":
                    continue
                if candidate_name == "$OVERVOTE":
                    continue
                if candidate_name in candidates:
                    candidate = candidates[candidate_name]
                else:
                    candidate = Candidate(name=candidate_name)
                    candidates[candidate_name] = candidate
                ranked_candidates.append(candidate)
    
            ballot = Ballot(ranked_candidates)
            ballots.append(ballot)


            davis=Candidate('davis') #CHANGE
            gilbert=Candidate('gilbert') #CHANGE
            heller=Candidate('heller') #CHANGE
            lombardo=Candidate('lombardo') #CHANGE
            sisolak=Candidate('sisolak') #CHANGE

            candidates=[davis, gilbert, heller, lombardo, sisolak]
            election_result = pyrankvote.instant_runoff_voting(candidates, ballots, pick_random_if_blank=False)
            winners = election_result.get_winners()
            win = str(winners)
            first=win[win.find("(")+1:win.find(")")]
            first = first.replace("'", "")
            champs.append(first)
            davis_win = champs.count('davis') #CHANGE
            gilbert_win = champs.count('gilbert') #CHANGE
            heller_win = champs.count('heller') #CHANGE
            lombardo_win = champs.count('lombardo') #CHANGE
            sisolak_win = champs.count('sisolak') #CHANGE
            
            sim=sim+1
            #Create temporary df to store results
            temp_results = pd.DataFrame()
            temp_results = pd.DataFrame([{'sim':sim, 'dem':dem,'rep':rep, 'ind':ind, 'third':third, 
                                      'num_davis':davis_win, 'num_gilbert':gilbert_win, 'num_heller':heller_win, 'num_lombardo':lombardo_win, 'num_sisolak':sisolak_win}])

            results = pd.concat([results, temp_results])
            temp_results = pd.DataFrame()
            champs=[]
            count=count+1

        else:
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])
            gov22 = data2[['davis', 'gilbert', 'heller', 'lombardo', 'sisolak']]
            gov22 = gov22.reset_index()
            gov22_rcv = pd.melt(gov22, id_vars='index', value_vars=['davis', 'gilbert', 'heller', 'lombardo', 'sisolak'])
            gov22_rcv= gov22_rcv.sort_values(by=['index', 'value'])
            gov22_rcv = gov22_rcv.dropna(subset = ['value'])
            gov22_rcv['index']=gov22_rcv['index']+10
            gov22_rcv['dups'] = gov22_rcv.duplicated(subset=['index', 'value'], keep=False)
            gov22_rcv['value']=gov22_rcv['value'].astype(int)
            gov22_rcv.loc[gov22_rcv['dups']==True, 'value']='$OVERVOTE'
            gov22_rcv['value']=gov22_rcv['value'].fillna('$UNDERVOTE')
            gov22_rcv['value']=gov22_rcv['value'].astype(str)
            gov22_rcv['index']=gov22_rcv['index'].astype(str)
            gov22_rcv = gov22_rcv.iloc[:,[0,2,1]]
            gov22_rcv=gov22_rcv.rename(columns={'index':"ballot_id", 'value':'rank', 'variable':'candidate_name'})
            ranks2 = gov22_rcv.values.tolist()
            tups = [tuple(l) for l in ranks2]
        
            parsed_csv_file = [(ballot_id, rank, candidate_name) for ballot_id, rank, candidate_name in tups]        #sorted_csv_file = sorted(parsed_csv_file, key=itemgetter(0,1))
            sorted_csv_file = parsed_csv_file
    
            candidates = {}
            ballots = []
            last_ballot_id = 0
            ranked_candidates = []

            for ballot_id, rank, candidate_name in sorted_csv_file:
                if ballot_id != last_ballot_id and last_ballot_id != 0:
                    ballot = Ballot(ranked_candidates)
                    ballots.append(ballot)
                    ranked_candidates = []

                last_ballot_id = ballot_id
                if candidate_name == "$UNDERVOTE":
                    continue
                if candidate_name == "$OVERVOTE":
                    continue
                if candidate_name in candidates:
                    candidate = candidates[candidate_name]
                else:
                    candidate = Candidate(name=candidate_name)
                    candidates[candidate_name] = candidate
                ranked_candidates.append(candidate)
    
            ballot = Ballot(ranked_candidates)
            ballots.append(ballot)

            davis=Candidate('davis') #CHANGE
            gilbert=Candidate('gilbert') #CHANGE
            heller=Candidate('heller') #CHANGE
            lombardo=Candidate('lombardo') #CHANGE
            sisolak=Candidate('sisolak') #CHANGE

            candidates=[davis, gilbert, heller, lombardo, sisolak]
            election_result = pyrankvote.instant_runoff_voting(candidates, ballots, pick_random_if_blank=False)
            winners = election_result.get_winners()
            win = str(winners)
            first=win[win.find("(")+1:win.find(")")]
            first = first.replace("'", "")
            champs.append(first)
        
            count=count+1

results= results.sort_values(by=['dem', 'rep'])
results2 = results.groupby(['dem'])[('sim')].count()
results2= results2.to_frame()
results_wins = results.groupby(['dem'])[('num_davis', 'num_gilbert', 'num_heller', 'num_lombardo', 'num_sisolak')].sum()
final_winners = pd.merge(results2, results_wins, how='inner', on='dem')
final_winners['davis_perc'] = final_winners['num_davis']/(final_winners['sim']*500)
final_winners['gilbert_perc'] = final_winners['num_gilbert']/(final_winners['sim']*500)
final_winners['heller_perc'] = final_winners['num_heller']/(final_winners['sim']*500)
final_winners['lombardo_perc'] = final_winners['num_lombardo']/(final_winners['sim']*500)
final_winners['sisolak_perc'] = final_winners['num_sisolak']/(final_winners['sim']*500)
final_winners = final_winners.reset_index()


final_winners.to_csv('gov22_general.csv', index=False)
